# Who Will Defend Democracy? Evaluating Tradeoffs in Candidate Support Among Partisan Donors and Voters
# Step 6: Summary statistics (Appendix)
# Last updated: June 19, 2020

# Initial settings --------------------------------------------------------

library(tidyverse)
library(stargazer)
library(readxl)

# Load and wrangle data ---------------------------------------------------

load("temp/data_for_summary_stats_donors.Rdata")

donors <- respondents %>% mutate(ideo = NA,
                                 ideo = ifelse(ideology == 1 & !is.na(ideology), 1, ideo), # very conservative
                                 ideo = ifelse(ideology == 2 & !is.na(ideology), 1, ideo), # somewhat conservative
                                 ideo = ifelse(ideology == 3 & !is.na(ideology), 1, ideo), # slightly conservative 
                                 ideo = ifelse(ideology == 4 & !is.na(ideology), 2, ideo), # moderate; middle of the road
                                 ideo = ifelse(ideology == 5 & !is.na(ideology), 3, ideo), # slightly liberal
                                 ideo = ifelse(ideology == 6 & !is.na(ideology), 3, ideo), # somewhat liberal
                                 ideo = ifelse(ideology == 7 & !is.na(ideology), 3, ideo) # very liberal
) %>% 
  mutate(sample = 1) %>% 
  rename(party = partisanship_dum) %>% 
  mutate(age = age - 1)

load("temp/data_for_summary_stats_public.Rdata")

public <- respondents %>% mutate(ideo = NA,
                                 ideo = ifelse(ideo7 == 1 & !is.na(ideo7), 1, ideo), # very conservative
                                 ideo = ifelse(ideo7 == 2 & !is.na(ideo7), 1, ideo), # somewhat conservative
                                 ideo = ifelse(ideo7 == 3 & !is.na(ideo7), 1, ideo), # slightly conservative 
                                 ideo = ifelse(ideo7 == 4 & !is.na(ideo7), 2, ideo), # moderate; middle of the road
                                 ideo = ifelse(ideo7 == 5 & !is.na(ideo7), 3, ideo), # slightly liberal
                                 ideo = ifelse(ideo7 == 6 & !is.na(ideo7), 3, ideo), # somewhat liberal
                                 ideo = ifelse(ideo7 == 7 & !is.na(ideo7), 3, ideo) # very liberal
) %>% 
  mutate(partisanship_dum_7 = NA,
         partisanship_dum_7 = ifelse(pid7 == 1 & !is.na(pid7), 2, partisanship_dum_7), # strong democrat
         partisanship_dum_7 = ifelse(pid7 == 2 & !is.na(pid7), 2, partisanship_dum_7), # not very strong democrat
         partisanship_dum_7 = ifelse(pid7 == 3 & !is.na(pid7), 2, partisanship_dum_7), # democratic leaner
         partisanship_dum_7 = ifelse(pid7 == 5 & !is.na(pid7), 1, partisanship_dum_7), # republican leaner
         partisanship_dum_7 = ifelse(pid7 == 6 & !is.na(pid7), 1, partisanship_dum_7), # not very strong republican
         partisanship_dum_7 = ifelse(pid7 == 7 & !is.na(pid7), 1, partisanship_dum_7), # strong republican
         partisanship_dum_7 = ifelse(pid7 == 4 & !is.na(pid7), 3, partisanship_dum_7) # independent
  ) %>% 
  mutate(polinterest = ifelse(polinterest == 1 & !is.na(polinterest), 2, polinterest) # collapse "extremely" and "very" interested in politics
  ) %>% 
  mutate(sample = 2) %>% 
  rename(age = `age9`,
         party = partisanship_dum_7,
         education = `educ7`, 
         race = `ethnicity`,
         interest = `polinterest`) %>% 
  mutate(age = as.integer(age),
         gender = as.integer(gender),
         education = as.integer(education),
         race = as.integer(race),
         interest = as.integer(interest)) %>% 
  mutate(age = age - 1) %>% 
  mutate(interest = interest - 1)

# Functions to make descriptive statistics tables --------------------------

show_distribution <- function(data, groupby, var_label){
  
  g <- enquo(groupby)
  g_name <- quo_name(g)
  
  data %>% 
    count(!!g) %>% 
    filter(!is.na(!!g)) %>% # Exclude NA
    mutate(pct = 100 * (n / sum(n)),
           pct = round(pct, 1),
           pct = format(pct, nsmall = 1),
           pct = paste0(pct, "%")) %>% 
    mutate(var = var_label) %>%
    select(var, "group" = g_name, pct)
  
}

show_distribution_weighted <- function(data, groupby, var_label){
  
  g <- enquo(groupby)
  g_name <- quo_name(g)
  
  data %>% 
    count(!!g, wt = weight) %>% 
    filter(!is.na(!!g)) %>% # Exclude NA
    mutate(pct = 100 * (n / sum(n)),
           pct = round(pct, 1),
           pct = format(pct, nsmall = 1),
           pct = paste0(pct, "%")) %>% 
    mutate(var = var_label) %>%
    select(var, "group" = g_name, pct)
  
}

# Function to save tables -------------------------------------------------

save_tables <- function(table, title){
  
  filetype <- c("text", "latex")
  for (i in 1:2){
    if (filetype[i] == "text"){
      type <- "text"
      extension <- "txt"
    }
    else {
      type <- "latex"
      extension <- "tex"
    }  
    stargazer(table, 
              summary = FALSE,
              type = type, 
              rownames = FALSE,
              out = paste0("tables/", title, ".", extension))
  }
  
}

# Specify variable names and category names -------------------------------

varia1 <- "Age"
varia2 <- "Sex"
varia3 <- "Education"
varia4 <- "Race"
varia5 <- "Party"
varia6 <- "Ideology"
varia7 <- "Trump approval"
varia8 <- "Political interest"

group1 <- c("18-24", "25-34", "35-44", "45-54", "55-64", "65-74", "75-84", "85 or older")
group2 <- c("Male", "Female") # no "other"
group3 <- c("Did not graduate from high school",
            "High school diploma or the equivalent (GED)", 
            "Some college",
            "Associate's degree",
            "Bachelor’s degree",
            "Master’s degree",
            "Professional or doctorate degree")
group4 <- c("White", 
            "Black or African American", 
            "American Indian or Alaska Native", 
            "Asian/Pacific Islander", 
            "Multi-racial", 
            "Hispanic/Latino/Chicano/a",
            "Other")
group5 <- c("Republican", "Democrat", "Neither")
group6 <- c("Conservative", "Moderate", "Liberal")
group7 <- c("Approve of Donald Trump", "Disapprove of Donald Trump")
group8 <- c("Very interested", "Somewhat interested", "Not very interested", "Not at all interested")

# Summary statistics: Donors ----------------------------------------------------------------

var_labels <- data.frame(
  var = c(varia1, varia2, varia3, varia4, varia5, varia6, varia7, varia8),
  group = 0,
  stringsAsFactors = FALSE
)

out <- bind_rows(
  show_distribution(donors, age, varia1),
  show_distribution(donors, gender, varia2),
  show_distribution(donors, education, varia3),
  show_distribution(donors, race, varia4),
  show_distribution(donors, party, varia5),
  show_distribution(donors, ideo, varia6),
  show_distribution(donors, trump_dum, varia7),
  show_distribution(donors, interest, varia8),
  var_labels
) %>% 
  mutate(var = factor(var, levels = c(varia1, varia2, varia3, varia4, varia5, varia6, varia7, varia8))) %>% 
  arrange(var, group) %>% 
  mutate_if(is.factor, as.character)

out$group[out$var == varia1] <- c(varia1, group1)
out$group[out$var == varia2] <- c(varia2, group2)
out$group[out$var == varia3] <- c(varia3, group3)
out$group[out$var == varia4] <- c(varia4, group4)
out$group[out$var == varia5] <- c(varia5, group5)
out$group[out$var == varia6] <- c(varia6, group6)
out$group[out$var == varia7] <- c(varia7, group7)
out$group[out$var == varia8] <- c(varia8, group8)

out <- out %>%
  select("Characteristics" = group, 
         "Donor sample" = pct)

save_tables(out, "donor_summary_stats")

# Summary statistics: Public ----------------------------------------------------------------

out2 <- bind_rows(
  show_distribution_weighted(public, age, varia1),
  show_distribution_weighted(public, gender, varia2),
  show_distribution_weighted(public, education, varia3),
  show_distribution_weighted(public, race, varia4),
  show_distribution_weighted(public, party, varia5),
  show_distribution_weighted(public, ideo, varia6),
  show_distribution_weighted(public, trump_dum, varia7),
  show_distribution_weighted(public, interest, varia8),
  var_labels
) %>% 
  mutate(var = factor(var, levels = c(varia1, varia2, varia3, varia4, varia5, varia6, varia7, varia8))) %>% 
  arrange(var, group) %>% 
  mutate_if(is.factor, as.character)

out2$group[out2$var == varia1] <- c(varia1, group1)
out2$group[out2$var == varia2] <- c(varia2, group2)
out2$group[out2$var == varia3] <- c(varia3, group3)
out2$group[out2$var == varia4] <- c(varia4, group4)
out2$group[out2$var == varia5] <- c(varia5, group5)
out2$group[out2$var == varia6] <- c(varia6, group6)
out2$group[out2$var == varia7] <- c(varia7, group7)
out2$group[out2$var == varia8] <- c(varia8, group8)


out2 <- out2 %>%
  select("Characteristics" = group, 
         "Public sample" = pct)

save_tables(out2, "public_summary_stats")